# Results produced with R version 3.6.3 El Capitan build

# Load relevant libraries
library("stargazer")
library("ggplot2")
library("glue")
library("MatchIt")
library("dplyr")
library("grid")
library("gridExtra")
library("reshape2")
library('lemon')
library("readxl")
library("estimatr")
library("plyr")

#Step 1. Setting directory: change as needed to point to data file
setwd("C:/Users/NFran/Dropbox/JOP replication analyst/workspace/11.9.2022, 220204_R1/replication")

#Step 2. Reading Data
data<-read.csv(file="data_spainlr.csv", header=TRUE, sep=",")
                  
## Step 3. Select the variables for matching
data_for_matching = select(data, c(pobladosdummy,
										zonaregable_dummy,
										lpop1940,
										meanelevation,
										meanslope,
										soil_quality,
										roadraildensity,
										d_madrid,
										lat,
										lon,
										dist_river,
										prov,
										INE_2011t))

## Step 4. Removing NAs
trimmed_for_matching = data_for_matching %>%
  filter(!is.na(pobladosdummy) &
  			!is.na(zonaregable_dummy) &
			!is.na(lpop1940) & 
			!is.na(meanelevation) &
			!is.na(meanslope) &
			!is.na(soil_quality) &
			!is.na(roadraildensity) &
			!is.na(d_madrid) &
			!is.na(lat) &
			!is.na(lon) &
			!is.na(dist_river) &
			!is.na(prov) &
			!is.na(INE_2011t))

## Step 5. Matching procedure
m.out2 <- matchit(pobladosdummy ~ zonaregable_dummy+
				prov+
				meanelevation+
				meanslope+
				soil_quality+
				d_madrid + 
				roadraildensity + 
                  lat +
                  lon + 
                  dist_river +
                  lpop1940,
                  data = trimmed_for_matching,
                  method = "nearest",  
				exact = ~ prov+zonaregable_dummy, 
                  replace=F)
                  
## Step 6. Create a new dataframe and merge in rest of full dataset
dataspine = match.data(m.out2)
data_final_norepeats = data[,!names(data) %in%
	c("pobladosdummy","zonaregable_dummy", "lpop1940",	"meanelevation", "meanslope","soil_quality","roadraildensity","d_madrid","lat",
		"lon","dist_river","prov")]
mmdata = merge(dataspine,data_final_norepeats,by.x="INE_2011t",by.y="INE_2011t")

## Step 7. Drop municipalities with populations >50K
mmdata = mmdata[-c(which(mmdata$subclass %in% mmdata$subclass[which(mmdata$pop1970>50000)])),]

# Step 8. Generate variables on population change around and after settlement
max_yr_b_paired <- rep(NA, length(mmdata$max_yr_b))
min_yr_b_paired <- rep(NA, length(mmdata$min_yr_b))
for (u in 1:dim(mmdata)[1]){
	max_yr_b_paired[u] = mean(subset(mmdata,mmdata$subclass==mmdata$subclass[u])$max_yr_b,na.rm=T)
	min_yr_b_paired[u] = mean(subset(mmdata,mmdata$subclass==mmdata$subclass[u])$min_yr_b,na.rm=T)
}
mmdata$max_yr_b_paired = max_yr_b_paired
mmdata$min_yr_b_paired = min_yr_b_paired

mmdata$pop1980 = mmdata$pop1981
mmdata$pop1990 = mmdata$pop1991
mmdata$pop2000 = mmdata$pop2001
mmdata$pop2010 = mmdata$pop2011
popbeforecol_census <- rep(NA, length(mmdata$max_yr_b))
poparoundcol_extradecade <- rep(NA, length(mmdata$max_yr_b))
popaftercol_decade <- rep(NA, length(mmdata$max_yr_b))
popaftercol_threedecades <- rep(NA, length(mmdata$max_yr_b))
popaftercol_census <- rep(NA, length(mmdata$max_yr_b))
popaftercol_secondcensus <- rep(NA, length(mmdata$max_yr_b))
poparoundcol <- rep(NA, length(mmdata$max_yr_b))

for (u in 1:dim(mmdata)[1]){
	poparoundcol_extradecade[u] = mmdata[u,which(names(mmdata)==paste("pop",round_any(mmdata$max_yr_b_paired[u],10,ceiling)+10,sep=""))]-mmdata[u,which(names(mmdata)==paste("pop",round_any(mmdata$min_yr_b_paired[u],10,floor),sep=""))]
	popaftercol_decade[u] = mmdata[u,which(names(mmdata)==paste("pop",round_any(mmdata$max_yr_b_paired[u],10,ceiling)+10,sep=""))]-mmdata[u,which(names(mmdata)==paste("pop",round_any(mmdata$max_yr_b_paired[u],10,ceiling),sep=""))]
	popaftercol_threedecades[u] = mmdata[u,which(names(mmdata)==paste("pop",round_any(mmdata$max_yr_b_paired[u],10,ceiling)+30,sep=""))]-mmdata[u,which(names(mmdata)==paste("pop",round_any(mmdata$max_yr_b_paired[u],10,ceiling),sep=""))]
	popaftercol_census[u] = mmdata[u,which(names(mmdata)==paste("pop",round_any(mmdata$max_yr_b_paired[u],10,ceiling),sep=""))]
	popaftercol_secondcensus[u] = mmdata[u,which(names(mmdata)==paste("pop",round_any(mmdata$max_yr_b_paired[u],10,ceiling)+10,sep=""))]
	popbeforecol_census[u] = mmdata[u,which(names(mmdata)==paste("pop",round_any(mmdata$max_yr_b_paired[u],10,floor),sep=""))]
}

mmdata$poparoundcol_extradecade = poparoundcol_extradecade
mmdata$poparoundcol_extradecade_pc = poparoundcol_extradecade/popbeforecol_census
mmdata$popbeforecol_census = popbeforecol_census
mmdata$popaftercol_decade = popaftercol_decade
mmdata$popaftercol_decade_pc = popaftercol_decade/popaftercol_census
mmdata$popaftercol_secondtwodecades = popaftercol_threedecades-popaftercol_decade
mmdata$popaftercol_secondtwodecades_pc = mmdata$popaftercol_secondtwodecades/popaftercol_secondcensus
mmdata$colonist_popshare_co = (mmdata$h_colonos+mmdata$h_obreros)/mmdata$popbeforecol_census


## Step 9. Pre-treatment political differences: Figure 4 analysis
pretxvars = c("izq1936","UGT_CNT1930spercap","cclandless","ccowners","peasantunion_dummy","fosasdummy")
pretxvarnames = c("Left vote 1936","UGT/CNT union density 1930s","Landlessness 1932","Smallholders 1932","Peasant union 1932","Mass graves from Civil War")

conf.int1 <- rep(NA, length(pretxvars))
conf.int2 <- rep(NA, length(pretxvars))
estimate <- rep(NA, length(pretxvars))
conf90.int1 <- rep(NA, length(pretxvars))
conf90.int2 <- rep(NA, length(pretxvars))
stderr <- rep(NA, length(pretxvars))
p.value <- rep(NA, length(pretxvars))
obs <- rep(NA, length(pretxvars))
for (i in 1:length(pretxvars)){
	t.val = t.test(mmdata[[pretxvars[i]]][which(mmdata$pobladosdummy==1)],
				mmdata[[pretxvars[i]]][which(mmdata$pobladosdummy==0)])
	conf.int1[i] = t.val$conf.int[1]
	conf.int2[i] = t.val$conf.int[2]
	estimate[i] = t.val$estimate[1]-t.val$estimate[2]
	t.val90 = t.test(mmdata[[pretxvars[i]]][which(mmdata$pobladosdummy==1)], 
				mmdata[[pretxvars[i]]][which(mmdata$pobladosdummy==0)],conf.level=0.9)
	conf90.int1[i] = t.val90$conf.int[1]
	conf90.int2[i] = t.val90$conf.int[2]
	stderr[i] = t.val$stderr
	p.value[i] = t.val$p.value
	obs[i] = sum(!is.na(mmdata[[pretxvars[i]]]))
}

pretx = data.frame(estimate=estimate,conf.int1 = conf.int1, conf.int2 = conf.int2,conf90.int1 = conf90.int1,conf90.int2 = conf90.int2, vars=pretxvars,varnames=pretxvarnames)
pretx.rescale = pretx
pretx.rescale[which(pretx.rescale$varnames=="Landlessness 1932"),1:5]= pretx.rescale[which(pretx.rescale$varnames=="Landlessness 1932"),1:5]*10
pretx.rescale[which(pretx.rescale$varnames=="Smallholders 1932"),1:5]= pretx.rescale[which(pretx.rescale$varnames=="Smallholders 1932"),1:5]*10

pretx.apptable = data.frame(varnames = pretxvarnames, estimate = estimate, stderr = stderr, p.value = p.value, obs = obs)
pretx.apptable.rescale = pretx.apptable
pretx.apptable.rescale[which(pretx.apptable.rescale$varnames=="Landlessness 1932"),2:3]= pretx.apptable.rescale[which(pretx.apptable.rescale$varnames=="Landlessness 1932"),2:3]*10
pretx.apptable.rescale[which(pretx.apptable.rescale$varnames=="Smallholders 1932"),2:3]= pretx.apptable.rescale[which(pretx.apptable.rescale$varnames=="Smallholders 1932"),2:3]*10
print(pretx.apptable.rescale,digits=3)


## Step 10. Balance tests: Figure 5 analysis
balancevars = c("meanelevation","meanslope","soil_quality","lat","lon", "dist_river","lpop1940","d_madrid","roadraildensity")
balancevarnames = c("Elevation","Slope","Soil Quality","Latitude","Longitude","Distance to River","Population in 1940","Distance to Madrid","Road Density")

conf.int1 <- rep(NA, length(balancevars))
conf.int2 <- rep(NA, length(balancevars))
estimate <- rep(NA, length(balancevars))
conf90.int1 <- rep(NA, length(balancevars))
conf90.int2 <- rep(NA, length(balancevars))
stderr <- rep(NA, length(balancevars))
p.value <- rep(NA, length(balancevars))
obs <- rep(NA, length(balancevars))
for (i in 1:length(balancevars)){
	t.val = t.test(mmdata[[balancevars[i]]][which(mmdata$pobladosdummy==1)],
				mmdata[[balancevars[i]]][which(mmdata$pobladosdummy==0)])
	conf.int1[i] = t.val$conf.int[1]
	conf.int2[i] = t.val$conf.int[2]
	estimate[i] = t.val$estimate[1]-t.val$estimate[2]
	t.val90 = t.test(mmdata[[balancevars[i]]][which(mmdata$pobladosdummy==1)], 
				mmdata[[balancevars[i]]][which(mmdata$pobladosdummy==0)],conf.level=0.9)
	conf90.int1[i] = t.val90$conf.int[1]
	conf90.int2[i] = t.val90$conf.int[2]
	stderr[i] = t.val$stderr
	p.value[i] = t.val$p.value
	obs[i] = sum(!is.na(mmdata[[balancevars[i]]]))
}

balance = data.frame(estimate=estimate,conf.int1 = conf.int1, conf.int2 = conf.int2,conf90.int1 = conf90.int1,conf90.int2 = conf90.int2, vars=balancevars,varnames=balancevarnames)
balance.rescale = balance
balance.rescale[which(balance.rescale$varnames=="Elevation"),1:5]= balance.rescale[which(balance.rescale$varnames=="Elevation"),1:5]/100
balance.rescale[which(balance.rescale$varnames=="Soil Quality"),1:5]= balance.rescale[which(balance.rescale$varnames=="Soil Quality"),1:5]*100
balance.rescale[which(balance.rescale$varnames=="Distance to Madrid"),1:5]= balance.rescale[which(balance.rescale$varnames=="Distance to Madrid"),1:5]/100
balance.rescale[which(balance.rescale$varnames=="Road Density"),1:5]= balance.rescale[which(balance.rescale$varnames=="Road Density"),1:5]*10

balance.apptable = data.frame(varnames = balancevarnames, estimate = estimate, stderr = stderr, p.value = p.value, obs = obs)
balance.apptable.rescale = balance.apptable
balance.apptable.rescale[which(balance.apptable.rescale$varnames=="Elevation"),2:3]= balance.apptable.rescale[which(balance.apptable.rescale$varnames=="Elevation"),2:3]/100
balance.apptable.rescale[which(balance.apptable.rescale$varnames=="Soil Quality"),2:3]= balance.apptable.rescale[which(balance.apptable.rescale$varnames=="Soil Quality"),2:3]*100
balance.apptable.rescale[which(balance.apptable.rescale$varnames=="Distance to Madrid"),2:3]= balance.apptable.rescale[which(balance.apptable.rescale$varnames=="Distance to Madrid"),2:3]/100
balance.apptable.rescale[which(balance.apptable.rescale$varnames=="Road Density"),2:3]= balance.apptable.rescale[which(balance.apptable.rescale$varnames=="Road Density"),2:3]*10
print(balance.apptable.rescale,digits=3)


# 11. Potential compound treatment effects: Figure 6 analysis
compoundtx = c("av_socioec","log_income_med","popaftercol_secondtwodecades_pc")
compoundtxnames = c("Economic Well-Being 2001","Income Per Capita 2013","Population Change After Settlement")

stderr <- rep(NA, length(compoundtx))
p.value <- rep(NA, length(compoundtx))
obs <- rep(NA, length(compoundtx))
estimate_lm <- rep(NA, length(compoundtx))
conf.int1_lm <- rep(NA, length(compoundtx))
conf.int2_lm <- rep(NA, length(compoundtx))
conf90.int1_lm <- rep(NA, length(compoundtx))
conf90.int2_lm <- rep(NA, length(compoundtx))
for (i in 1:length(compoundtx)){
	m.1 <- lm_robust(mmdata[[compoundtx[i]]] ~ pobladosdummy + meanelevation + meanslope + soil_quality + lat + lon + dist_river + lpop1940 + d_madrid + roadraildensity  + as.factor(prov), mmdata, cluster = subclass)
	estimate_lm[i] = m.1$coefficients[which(names(m.1$coefficients)=="pobladosdummy")]
	conf.int1_lm[i] = m.1$conf.low[which(names(m.1$conf.low)=="pobladosdummy")]
	conf.int2_lm[i] = m.1$conf.high[which(names(m.1$conf.high)=="pobladosdummy")]
	m.1.90 <- lm_robust(mmdata[[compoundtx[i]]] ~ pobladosdummy + meanelevation + meanslope + soil_quality + lat + lon + dist_river + lpop1940 + d_madrid + roadraildensity  + as.factor(prov), mmdata, cluster = subclass,alpha=0.1)
	conf90.int1_lm[i] = m.1.90$conf.low[which(names(m.1.90$conf.low)=="pobladosdummy")]
	conf90.int2_lm[i] = m.1.90$conf.high[which(names(m.1.90$conf.high)=="pobladosdummy")]
	stderr[i] = m.1$std.error[which(names(m.1$coefficients)=="pobladosdummy")]
	p.value[i] = m.1$p.value[which(names(m.1$coefficients)=="pobladosdummy")]
	obs[i] = m.1$nobs
}

dfcompoundtx_lm = data.frame(estimate=estimate_lm,conf.int1 = conf.int1_lm, conf.int2 = conf.int2_lm, conf90.int1 = conf90.int1_lm,conf90.int2 = conf90.int2_lm, vars = compoundtx, varnames = compoundtxnames)
compoundtx.rescale = dfcompoundtx_lm
compoundtx.rescale[which(compoundtx.rescale$varnames=="Economic Well-Being 2001"),1:5]= compoundtx.rescale[which(compoundtx.rescale$varnames=="Economic Well-Being 2001"),1:5]/10

dfcompoundtx_lm.apptable = data.frame(varnames = compoundtxnames, estimate=estimate_lm, stderr = stderr, p.value = p.value, obs = obs)
print(dfcompoundtx_lm.apptable,digits=3)


## Step 12. Outcomes: Read instructions below for how to replicate remaining results

# *Note: Step 12/13 code can be used to produce Figure 7, Figure 8, and Figure 9 in paper, and figures in Appendix part III.1, Appendix part VI.1.C, and Appendix parts VI.2 and VI.3
# *After subsetting the data for each of Figures 8a, 8b, 8c, Figures 9a and 9b, and figures in Appendix part III.1 and Appendix part VI.1.C, return first to the full matched dataset by rerunning Steps 6-8. 

# To create Figure 7, simply proceed to the code in Steps 12A, 12B, 13D, and 13E

# To create Figure 8a, first run this snippet, followed by the code in Steps 12A, 12B, 13D, and 13E:
# mmdata = mmdata[-c(which(mmdata$subclass %in% mmdata$subclass[which(mmdata$pobladosdummy==1&mmdata$colonist_popshare_co<0.1)])),]
# To create Figure 8b, first return to the full matched dataset by rerunning Steps 6-8 and then run this snippet, followed by the code in Steps 12A, 12B, 13D, and 13E:
# mmdata = mmdata[-c(which(mmdata$subclass %in% mmdata$subclass[which(mmdata$pobladosdummy==1&(mmdata$ampliacion==1|mmdata$vivdis==1))])),]
# To create Figure 8c, first return to the full matched dataset by rerunning Steps 6-8 and then run this snippet, followed by the code in Steps 12A, 12B, 13D, and 13E:
# mmdata = mmdata[-c(which(mmdata$subclass %in% mmdata$subclass[which(mmdata$pobladosdummy==1&mmdata$p_coop_share==0)])),]

# To create Figure 9a, first return to the full matched dataset by rerunning Steps 6-8 and then run this snippet, followed by the code in Steps 12A, 12B, 13D, and 13E:
# mmdata = mmdata[-c(which(mmdata$subclass %in% mmdata$subclass[which(mmdata$pobladosdummy==1&(mmdata$landpercolono<10|is.infinite(mmdata$landpercolono)))])),]
# To create Figure 9b, first return to the full matched dataset by rerunning Steps 6-8 and then run this snippet, followed by the code in Steps 12A, 12B, 13D, and 13E:
# mmdata = mmdata[-c(which(mmdata$subclass %in% mmdata$subclass[which(mmdata$pobladosdummy==1&(mmdata$cost_percap<(524.71)|is.infinite(mmdata$cost_percap)))])),]

# To create figures in Appendix part III.1, dropping matched pairs that are farther than 25km from each other, first return to the full matched dataset by rerunning Steps 6-8 and then run this snippet, followed by the code in Steps 10, 12A, 12B, 13B, 13D, and 13E:
# mmdata = mmdata[-c(which(mmdata$subclass %in% mmdata$subclass[which(mmdata$distINCpob>25)])),]

# To create figures in Appendix part VI.1.C, limiting to matched pairs where the population did not grow in the decade of settlement or subsequent settlement, first return to the full matched dataset by rerunning Steps 6-8 and then run this snippet, followed by the code in Steps 10, 12A, 12B, 13B, 13D, and 13E:
# mmdata = mmdata[-c(which(mmdata$subclass %in% mmdata$subclass[which(mmdata$pobladosdummy==1&mmdata$poparoundcol_extradecade>0)])),]

# To create figures in Appendix parts VI.2 and VI.3, first return to the full matched dataset by rerunning Steps 6-8 and then proceed to the code in Steps 12D, 12E, 13F, and 13G

# A. Left vote share
leftshare = c("leftshare_1977","leftshare_1979","leftshare_1982","leftshare_1986", "leftshare_1989", "leftshare_1993")
leftsharenames = c("Left Vote Share (1977)","Left Vote Share (1979)", "Left Vote Share (1982)", "Left Vote Share (1986)", "Left Vote Share (1989)", "Left Vote Share (1993)")
years = c("1977","1979", "1982", "1986", "1989", "1993")

stderr <- rep(NA, length(leftshare))
p.value <- rep(NA, length(leftshare))
obs <- rep(NA, length(leftshare))
estimate_lm <- rep(NA, length(leftshare))
conf.int1_lm <- rep(NA, length(leftshare))
conf.int2_lm <- rep(NA, length(leftshare))
conf90.int1_lm <- rep(NA, length(leftshare))
conf90.int2_lm <- rep(NA, length(leftshare))
for (i in 1:length(leftshare)){
	m.1 <- lm_robust(mmdata[[leftshare[i]]] ~ pobladosdummy + meanelevation + meanslope + soil_quality + lat + lon + dist_river + lpop1940 + d_madrid + roadraildensity  + as.factor(prov), mmdata, cluster = subclass)
	estimate_lm[i] = m.1$coefficients[which(names(m.1$coefficients)=="pobladosdummy")]
	conf.int1_lm[i] = m.1$conf.low[which(names(m.1$conf.low)=="pobladosdummy")]
	conf.int2_lm[i] = m.1$conf.high[which(names(m.1$conf.high)=="pobladosdummy")]
	m.1.90 <- lm_robust(mmdata[[leftshare[i]]] ~ pobladosdummy + meanelevation + meanslope + soil_quality + lat + lon + dist_river + lpop1940 + d_madrid + roadraildensity  + as.factor(prov), mmdata, cluster = subclass,alpha=0.1)
	conf90.int1_lm[i] = m.1.90$conf.low[which(names(m.1.90$conf.low)=="pobladosdummy")]
	conf90.int2_lm[i] = m.1.90$conf.high[which(names(m.1.90$conf.high)=="pobladosdummy")]
	stderr[i] = m.1$std.error[which(names(m.1$coefficients)=="pobladosdummy")]
	p.value[i] = m.1$p.value[which(names(m.1$coefficients)=="pobladosdummy")]
	obs[i] = m.1$nobs
}

dfleftshare_lm = data.frame(estimate=estimate_lm,conf.int1 = conf.int1_lm, conf.int2 = conf.int2_lm, conf90.int1 = conf90.int1_lm,conf90.int2 = conf90.int2_lm, vars = leftshare, varnames = leftsharenames)

dfleftshare_lm.apptable = data.frame(varnames = leftsharenames, estimate=estimate_lm, stderr = stderr, p.value = p.value, obs = obs)
print(dfleftshare_lm.apptable,digits=3)

# B. Francoist vote share
aspshare = c("Francoistshare_1977","Francoistshare_1979","Francoistshare_1982","Francoistshare_1986", "Francoistshare_1989", "Francoistshare_1993")
aspsharenames = c("Francoist Vote Share (1977)","Francoist Vote Share (1979)", "Francoist Vote Share (1982)", "Francoist Vote Share (1986)", "Francoist Vote Share (1989)", "Francoist Vote Share (1993)")
years = c("1977","1979", "1982", "1986", "1989", "1993")

stderr <- rep(NA, length(aspshare))
p.value <- rep(NA, length(aspshare))
obs <- rep(NA, length(aspshare))
estimate_lm <- rep(NA, length(aspshare))
conf.int1_lm <- rep(NA, length(aspshare))
conf.int2_lm <- rep(NA, length(aspshare))
conf90.int1_lm <- rep(NA, length(aspshare))
conf90.int2_lm <- rep(NA, length(aspshare))
for (i in 1:length(aspshare)){
	m.1 <- lm_robust(mmdata[[aspshare[i]]] ~ pobladosdummy + meanelevation + meanslope + soil_quality + lat + lon + dist_river + lpop1940 + d_madrid + roadraildensity  + as.factor(prov), mmdata, cluster = subclass)
	estimate_lm[i] = m.1$coefficients[which(names(m.1$coefficients)=="pobladosdummy")]
	conf.int1_lm[i] = m.1$conf.low[which(names(m.1$conf.low)=="pobladosdummy")]
	conf.int2_lm[i] = m.1$conf.high[which(names(m.1$conf.high)=="pobladosdummy")]
	m.1.90 <- lm_robust(mmdata[[aspshare[i]]] ~ pobladosdummy + meanelevation + meanslope + soil_quality + lat + lon + dist_river + lpop1940 + d_madrid + roadraildensity + as.factor(prov), mmdata, cluster = subclass,alpha=0.1)
	conf90.int1_lm[i] = m.1.90$conf.low[which(names(m.1.90$conf.low)=="pobladosdummy")]
	conf90.int2_lm[i] = m.1.90$conf.high[which(names(m.1.90$conf.high)=="pobladosdummy")]
	stderr[i] = m.1$std.error[which(names(m.1$coefficients)=="pobladosdummy")]
	p.value[i] = m.1$p.value[which(names(m.1$coefficients)=="pobladosdummy")]
	obs[i] = m.1$nobs
}

dfaspshare_lm = data.frame(estimate=estimate_lm,conf.int1 = conf.int1_lm, conf.int2 = conf.int2_lm, conf90.int1 = conf90.int1_lm,conf90.int2 = conf90.int2_lm, vars = aspshare, varnames = aspsharenames)

dfaspshare_lm.apptable = data.frame(varnames = aspsharenames, estimate=estimate_lm, stderr = stderr, p.value = p.value, obs = obs)
print(dfaspshare_lm.apptable,digits=3)

# C. NATO vote share: Placebo test referenced in paper around Figure 7
natoshare = c("si_1986_share","no_1986_share")
natosharenames = c("NATO Yes Vote Share","NATO No Vote Share")
years = c("1986","1986")

stderr <- rep(NA, length(natoshare))
p.value <- rep(NA, length(natoshare))
obs <- rep(NA, length(natoshare))
estimate_lm <- rep(NA, length(natoshare))
conf.int1_lm <- rep(NA, length(natoshare))
conf.int2_lm <- rep(NA, length(natoshare))
conf90.int1_lm <- rep(NA, length(natoshare))
conf90.int2_lm <- rep(NA, length(natoshare))
for (i in 1:length(natoshare)){
	m.1 <- lm_robust(mmdata[[natoshare[i]]] ~ pobladosdummy + meanelevation + meanslope + soil_quality + lat + lon + dist_river + lpop1940 + d_madrid + roadraildensity  + as.factor(prov), mmdata, cluster = subclass)
	estimate_lm[i] = m.1$coefficients[which(names(m.1$coefficients)=="pobladosdummy")]
	conf.int1_lm[i] = m.1$conf.low[which(names(m.1$conf.low)=="pobladosdummy")]
	conf.int2_lm[i] = m.1$conf.high[which(names(m.1$conf.high)=="pobladosdummy")]
	m.1.90 <- lm_robust(mmdata[[natoshare[i]]] ~ pobladosdummy + meanelevation + meanslope + soil_quality + lat + lon + dist_river + lpop1940 + d_madrid + roadraildensity  + as.factor(prov), mmdata, cluster = subclass,alpha=0.1)
	conf90.int1_lm[i] = m.1.90$conf.low[which(names(m.1.90$conf.low)=="pobladosdummy")]
	conf90.int2_lm[i] = m.1.90$conf.high[which(names(m.1.90$conf.high)=="pobladosdummy")]
	stderr[i] = m.1$std.error[which(names(m.1$coefficients)=="pobladosdummy")]
	p.value[i] = m.1$p.value[which(names(m.1$coefficients)=="pobladosdummy")]
	obs[i] = m.1$nobs
}

dfnatoshare_lm = data.frame(estimate=estimate_lm,conf.int1 = conf.int1_lm, conf.int2 = conf.int2_lm, conf90.int1 = conf90.int1_lm,conf90.int2 = conf90.int2_lm, vars = natoshare, varnames = natosharenames)

dfnatoshare_lm.apptable = data.frame(varnames = natosharenames, estimate=estimate_lm, stderr = stderr, p.value = p.value, obs = obs)
print(dfnatoshare_lm.apptable,digits=3)

# D. Voter turnout. From Appendix Part VI.2
voterturnout = c("voter_turnout1977","voter_turnout1979","voter_turnout1982","voter_turnout1986", "voter_turnout1989", "voter_turnout1993")
voterturnoutnames = c("Voter Turnout (1977)","Voter Turnout (1979)", "Voter Turnout (1982)", "Voter Turnout (1986)", "Voter Turnout (1989)", "Voter Turnout (1993)")
years = c("1977","1979", "1982", "1986", "1989", "1993")

estimate_lm <- rep(NA, length(voterturnout))
conf.int1_lm <- rep(NA, length(voterturnout))
conf.int2_lm <- rep(NA, length(voterturnout))
conf90.int1_lm <- rep(NA, length(voterturnout))
conf90.int2_lm <- rep(NA, length(voterturnout))
for (i in 1:length(voterturnout)){
		m.3 <- lm_robust(mmdata[[voterturnout[i]]] ~ pobladosdummy + meanelevation + meanslope + soil_quality + lat + lon + dist_river + lpop1940 + d_madrid + roadraildensity + as.factor(prov), mmdata, cluster = subclass)
	estimate_lm[i] = m.3$coefficients[which(names(m.3$coefficients)=="pobladosdummy")]
	conf.int1_lm[i] = m.3$conf.low[which(names(m.3$conf.low)=="pobladosdummy")]
	conf.int2_lm[i] = m.3$conf.high[which(names(m.3$conf.high)=="pobladosdummy")]
	m.3.90 <- lm_robust(mmdata[[voterturnout[i]]] ~ pobladosdummy + meanelevation + meanslope + soil_quality + lat + lon + dist_river + lpop1940 + d_madrid + roadraildensity + as.factor(prov), mmdata, cluster = subclass,alpha=0.1)
	conf90.int1_lm[i] = m.3.90$conf.low[which(names(m.3.90$conf.low)=="pobladosdummy")]
	conf90.int2_lm[i] = m.3.90$conf.high[which(names(m.3.90$conf.high)=="pobladosdummy")]
}

dfvoterturnout_lm = data.frame(estimate=estimate_lm,conf.int1 = conf.int1_lm, conf.int2 = conf.int2_lm, conf90.int1 = conf90.int1_lm,conf90.int2 = conf90.int2_lm, vars = voterturnout, varnames = voterturnoutnames)

# E. Political and economic equilibrium biased toward large landowners. From Appendix Part VI.3.
reconqvars = c("rec_rate","highrate0")
reconqvarnames = c("Reconquest Rate","High Rate of Reconquest")

conf.int1 <- rep(NA, length(reconqvars))
conf.int2 <- rep(NA, length(reconqvars))
estimate <- rep(NA, length(reconqvars))
conf90.int1 <- rep(NA, length(reconqvars))
conf90.int2 <- rep(NA, length(reconqvars))
for (i in 1:length(reconqvars)){
	t.val = t.test(mmdata[[reconqvars[i]]][which(mmdata$pobladosdummy==1)],
				mmdata[[reconqvars[i]]][which(mmdata$pobladosdummy==0)])
	conf.int1[i] = t.val$conf.int[1]
	conf.int2[i] = t.val$conf.int[2]
	estimate[i] = t.val$estimate[1]-t.val$estimate[2]
	t.val90 = t.test(mmdata[[reconqvars[i]]][which(mmdata$pobladosdummy==1)], 
				mmdata[[reconqvars[i]]][which(mmdata$pobladosdummy==0)],conf.level=0.9)
	conf90.int1[i] = t.val90$conf.int[1]
	conf90.int2[i] = t.val90$conf.int[2]
}

reconq = data.frame(estimate=estimate,conf.int1 = conf.int1, conf.int2 = conf.int2,conf90.int1 = conf90.int1,conf90.int2 = conf90.int2, vars=reconqvars,varnames=reconqvarnames)
reconq.rescale = reconq
reconq.rescale[which(reconq.rescale$varnames=="Reconquest Rate"),1:5]= reconq.rescale[which(reconq.rescale$varnames=="Reconquest Rate"),1:5]/10


## Step 13. Plotting results

setwd("C:/Users/NFran/Dropbox/JOP replication analyst/workspace/11.9.2022, 220204_R1/replication")

# A. Figure 4: Political characteristics prior to land settlement
ggplot(data = pretx.rescale, aes(x=reorder(pretxvarnames,c(-1,-2,-3,-4,-5,-6)))) +
     geom_point(aes(y=estimate),size = 1.3) +
     geom_errorbar(aes(ymin = conf.int1, ymax = conf.int2), size = 0.5, width = 0)+
     geom_errorbar(aes(ymin = conf90.int1, ymax = conf90.int2), size = 0.75, width = 0)+
     geom_hline(yintercept = 0) +
     theme_bw() +
     coord_flip() + 
     scale_y_continuous(name = "Treatment - Control Difference") + xlab("Pre-Treatment Variables")+
     ggsave("pretx.pdf",height=3,width=7,units="in",dpi=300)
     
# B. Figure 5: Balance tests 
ggplot(data = balance.rescale, aes(x=reorder(varnames,desc(varnames)))) +
     geom_point(aes(y=estimate),size = 1.3) +
     geom_errorbar(aes(ymin = conf.int1, ymax = conf.int2), size = 0.5, width = 0)+
     geom_errorbar(aes(ymin = conf90.int1, ymax = conf90.int2), size = 0.75, width = 0)+
     geom_hline(yintercept = 0) +
     theme_bw() +
     coord_flip() + 
     scale_y_continuous(name = "Treatment - Control Difference") + xlab("Covariates") +
     ggsave("balance.pdf",height=4,width=7,units="in",dpi=300)

# C. Figure 6: Potential compound treatments 
ggplot(data = compoundtx.rescale, aes(x=reorder(varnames,desc(varnames)))) +
     geom_point(aes(y=estimate),size = 1.3) +
     geom_errorbar(aes(ymin = conf.int1, ymax = conf.int2), size = 0.5, width = 0)+
     geom_errorbar(aes(ymin = conf90.int1, ymax = conf90.int2), size = 0.75, width = 0)+
     geom_hline(yintercept = 0) +
     theme_bw() +
     coord_flip() + 
     scale_y_continuous(name = "Treatment - Control Difference") + xlab("") +
     ggsave("compoundtx.pdf",height=2,width=7,units="in",dpi=300)
     
# D. Figure 7a: Left vote share 
ggplot(data = dfleftshare_lm, aes(x=years)) +
     geom_point(aes(y=estimate),size = 1.3) +
     geom_errorbar(aes(ymin = conf.int1, ymax = conf.int2), size = 0.5, width = 0)+
     geom_errorbar(aes(ymin = conf90.int1, ymax = conf90.int2), size = 0.75, width = 0)+
     geom_hline(yintercept = 0) +
     theme_bw() +
     scale_y_continuous(name = "Difference in Left Vote Share") + xlab("Year") +
     ggsave("leftvote.pdf",height=4,width=7,units="in",dpi=300)

# E. Figure 7b: Francoist vote share
ggplot(data = dfaspshare_lm, aes(x=years)) +
     geom_point(aes(y=estimate),size = 1.3) +
     geom_errorbar(aes(ymin = conf.int1, ymax = conf.int2), size = 0.5, width = 0)+
     geom_errorbar(aes(ymin = conf90.int1, ymax = conf90.int2), size = 0.75, width = 0)+
     geom_hline(yintercept = 0) +
     theme_bw() +
     scale_y_continuous(name = "Difference in Francoist Vote Share") + xlab("Year")+
     ggsave("Francoistvote.pdf",height=4,width=7,units="in",dpi=300)

# F. Figure in Appendix Part VI.2: Voter turnout 
ggplot(data = dfvoterturnout_lm, aes(x=years)) +
     geom_point(aes(y=estimate),size = 1.3) +
     geom_errorbar(aes(ymin = conf.int1, ymax = conf.int2), size = 0.5, width = 0)+
     geom_errorbar(aes(ymin = conf90.int1, ymax = conf90.int2), size = 0.75, width = 0)+
     geom_hline(yintercept = 0) +
     theme_bw() +
     scale_y_continuous(name = "Difference in Voter Turnout") + xlab("Year") +
     ggsave("voterturnout.pdf",height=4,width=7,units="in",dpi=300)
   
# G. Figure in Appendix Part VI.3: Political and economic equilibrium biased toward large landowners 
ggplot(data = reconq.rescale, aes(x=reorder(varnames,desc(varnames)))) +
     geom_point(aes(y=estimate),size = 1.3) +
     geom_errorbar(aes(ymin = conf.int1, ymax = conf.int2), size = 0.5, width = 0)+
     geom_errorbar(aes(ymin = conf90.int1, ymax = conf90.int2), size = 0.75, width = 0)+
     geom_hline(yintercept = 0) +
     theme_bw() +
     coord_flip() + 
     scale_y_continuous(name = "Treatment - Control Difference") + xlab("Covariates") +
     ggsave("landownerbias.pdf",height=4,width=7,units="in",dpi=300)


## Step 14. Results using census-tract level data: Appendix Part IV

setwd("C:/Users/NFran/Dropbox/JOP replication analyst/workspace/11.9.2022, 220204_R1/replication")

data_ct<-read.csv(file="data_censustractslr.csv", header=TRUE, sep=",")

# subset to treatment munis
mmdata = subset(data_ct,pobladosdummy==1)

# A. Left vote share for poblados in census tracts as treatment
leftshare = c("leftshare_1982","leftshare_1986", "leftshare_1989")
leftsharenames = c( "Left Vote Share (1982)", "Left Vote Share (1986)", "Left Vote Share (1989)")
years = c("1982", "1986", "1989")

stderr <- rep(NA, length(leftshare))
p.value <- rep(NA, length(leftshare))
obs <- rep(NA, length(leftshare))
estimate_lm <- rep(NA, length(leftshare))
conf.int1_lm <- rep(NA, length(leftshare))
conf.int2_lm <- rep(NA, length(leftshare))
conf90.int1_lm <- rep(NA, length(leftshare))
conf90.int2_lm <- rep(NA, length(leftshare))
for (i in 1:length(leftshare)){
	m.1 <- lm_robust(mmdata[[leftshare[i]]] ~ pobladosdummy_ct + as.factor(INE_2011t), mmdata,se_type="HC1")
	estimate_lm[i] = m.1$coefficients[which(names(m.1$coefficients)=="pobladosdummy_ct")]
	conf.int1_lm[i] = m.1$conf.low[which(names(m.1$conf.low)=="pobladosdummy_ct")]
	conf.int2_lm[i] = m.1$conf.high[which(names(m.1$conf.high)=="pobladosdummy_ct")]
	m.1.90 <- lm_robust(mmdata[[leftshare[i]]] ~ pobladosdummy_ct + as.factor(INE_2011t), mmdata,alpha=0.1,se_type="HC1")
	conf90.int1_lm[i] = m.1.90$conf.low[which(names(m.1.90$conf.low)=="pobladosdummy_ct")]
	conf90.int2_lm[i] = m.1.90$conf.high[which(names(m.1.90$conf.high)=="pobladosdummy_ct")]
	stderr[i] = m.1$std.error[which(names(m.1$coefficients)=="pobladosdummy_ct")]
	p.value[i] = m.1$p.value[which(names(m.1$coefficients)=="pobladosdummy_ct")]
	obs[i] = m.1$nobs
}

dfleftshare_lm = data.frame(estimate=estimate_lm,conf.int1 = conf.int1_lm, conf.int2 = conf.int2_lm, conf90.int1 = conf90.int1_lm,conf90.int2 = conf90.int2_lm, vars = leftshare, varnames = leftsharenames)

dfleftshare_lm.apptable = data.frame(varnames = leftsharenames, estimate=estimate_lm, stderr = stderr, p.value = p.value, obs = obs)
print(dfleftshare_lm.apptable,digits=3)

# B. Francoist vote share for poblados in census tracts as treatment
aspshare = c("Francoistshare_1982","Francoistshare_1986", "Francoistshare_1989")
aspsharenames = c("Francoist Vote Share (1982)", "Francoist Vote Share (1986)", "Francoist Vote Share (1989)")
years = c("1982", "1986", "1989")

stderr <- rep(NA, length(aspshare))
p.value <- rep(NA, length(aspshare))
obs <- rep(NA, length(aspshare))
estimate_lm <- rep(NA, length(aspshare))
conf.int1_lm <- rep(NA, length(aspshare))
conf.int2_lm <- rep(NA, length(aspshare))
conf90.int1_lm <- rep(NA, length(aspshare))
conf90.int2_lm <- rep(NA, length(aspshare))
for (i in 1:length(aspshare)){
	m.1 <- lm_robust(mmdata[[aspshare[i]]] ~ pobladosdummy_ct + as.factor(INE_2011t), mmdata,se_type="HC1")
	estimate_lm[i] = m.1$coefficients[which(names(m.1$coefficients)=="pobladosdummy_ct")]
	conf.int1_lm[i] = m.1$conf.low[which(names(m.1$conf.low)=="pobladosdummy_ct")]
	conf.int2_lm[i] = m.1$conf.high[which(names(m.1$conf.high)=="pobladosdummy_ct")]
	m.1.90 <- lm_robust(mmdata[[aspshare[i]]] ~ pobladosdummy_ct + as.factor(INE_2011t), mmdata,alpha=0.1,se_type="HC1")
	conf90.int1_lm[i] = m.1.90$conf.low[which(names(m.1.90$conf.low)=="pobladosdummy_ct")]
	conf90.int2_lm[i] = m.1.90$conf.high[which(names(m.1.90$conf.high)=="pobladosdummy_ct")]
	stderr[i] = m.1$std.error[which(names(m.1$coefficients)=="pobladosdummy_ct")]
	p.value[i] = m.1$p.value[which(names(m.1$coefficients)=="pobladosdummy_ct")]
	obs[i] = m.1$nobs
}

dfaspshare_lm = data.frame(estimate=estimate_lm,conf.int1 = conf.int1_lm, conf.int2 = conf.int2_lm, conf90.int1 = conf90.int1_lm,conf90.int2 = conf90.int2_lm, vars = aspshare, varnames = aspsharenames)

dfaspshare_lm.apptable = data.frame(varnames = aspsharenames, estimate=estimate_lm, stderr = stderr, p.value = p.value, obs = obs)
print(dfaspshare_lm.apptable,digits=3)

# Appendix Figure IV.A: Left vote share 
ggplot(data = dfleftshare_lm, aes(x=years)) +
     geom_point(aes(y=estimate),size = 1.3) +
     geom_errorbar(aes(ymin = conf.int1, ymax = conf.int2), size = 0.5, width = 0)+
     geom_errorbar(aes(ymin = conf90.int1, ymax = conf90.int2), size = 0.75, width = 0)+
     geom_hline(yintercept = 0) +
     theme_bw() +
     scale_y_continuous(name = "Difference in Left Vote Share") + xlab("Year") +
     ggsave("leftvote_ct.pdf",height=4,width=7,units="in",dpi=300)

# Appendix Figure IV.B: Francoist vote share
ggplot(data = dfaspshare_lm, aes(x=years)) +
     geom_point(aes(y=estimate),size = 1.3) +
     geom_errorbar(aes(ymin = conf.int1, ymax = conf.int2), size = 0.5, width = 0)+
     geom_errorbar(aes(ymin = conf90.int1, ymax = conf90.int2), size = 0.75, width = 0)+
     geom_hline(yintercept = 0) +
     theme_bw() +
     scale_y_continuous(name = "Difference in Francoist Vote Share") + xlab("Year")+
     ggsave("Francoistvote_ct.pdf",height=4,width=7,units="in",dpi=300)
   

